package com.ustcinfo.study.scala.r1.cuiyang

import org.apache.spark.{SparkConf, SparkContext}

/**
  * Created with IntelliJ IDEA.
  * Description: 词频统计
  * Author: cuiyang
  * Date: 2018-08-03-23:53
  */
object Sample {
  def main(args: Array[String]):Unit = {
    // 定义一个SparkConf
    val sparkConf = new SparkConf().setMaster ("local").setAppName ("test")
    // 定义一个SparkContext
    val sc = new SparkContext (sparkConf)
    // 从外部读取文件
    val txtRdd = sc.textFile ("src/main/resources/sampleData/dnsSample")

    txtRdd
      // 筛选数据
    .filter (x => x.trim ().length > 0)
      // 按空格分割数据
    .map (x => x.split (" ") )
    .flatMap (x => x)
    .map (x => (x, 1) )
    .reduceByKey (_+ _)
    .map (x => (x._2, x._1) ).sortByKey (false).take (10)
    .foreach (println)
  }

}
